import python

private predicate def_statement(Comment c) {
    c.getText().regexpMatch("#(\\S*\\s+)?def\\s.*\\(.*\\).*:\\s*(#.*)?")
}

private predicate if_statement(Comment c) {
    c.getText().regexpMatch("#(\\S*\\s+)?(el)?if\\s.*:\\s*(#.*)?")
    or
    c.getText().regexpMatch("#(\\S*\\s+)?else:\\s*(#.*)?")
}

private predicate for_statement(Comment c) {
    c.getText().regexpMatch("#(\\S*\\s+)?for\\s.*\\sin\\s.*:\\s*(#.*)?")
}

private predicate with_statement(Comment c) {
    c.getText().regexpMatch("#(\\S*\\s+)?with\\s+.*:\\s*(#.*)?")
}

private predicate try_statement(Comment c) {
    c.getText().regexpMatch("#(\\S*\\s+)?try:\\s*(#.*)?")
    or
    c.getText().regexpMatch("#(\\S*\\s+)?except\\s*(\\w+\\s*(\\sas\\s+\\w+\\s*)?)?:\\s*(#.*)?")
    or
    c.getText().regexpMatch("#(\\S*\\s+)?finally:\\s*(#.*)?")
}

private int indentation(Comment c) {
    exists(int offset |
        maybe_code(c) and
        exists(c.getText().regexpFind("[^\\s#]", 1, offset)) and
        result = offset + c.getLocation().getStartColumn()
    )
}

private predicate class_statement(Comment c) {
    c.getText().regexpMatch("#(\\S*\\s+)?class\\s+\\w+.*:\\s*(#.*)?")
}

private predicate triple_quote(Comment c) { c.getText().regexpMatch("#.*(\"\"\"|''').*") }

private predicate triple_quoted_string_part(Comment start, Comment end) {
    triple_quote(start) and end = start
    or
    exists(Comment mid |
        triple_quoted_string_part(start, mid) and
        end = non_empty_following(mid) and
        not triple_quote(end)
    )
}

private predicate maybe_code(Comment c) {
    not non_code(c) and not filler(c) and not endline_comment(c) and not file_or_url(c)
    or
    commented_out_comment(c)
}

private predicate commented_out_comment(Comment c) { c.getText().regexpMatch("#+\\s+#.*") }

private int scope_start(Comment start) {
    (
        def_statement(start) or
        class_statement(start)
    ) and
    result = indentation(start) and
    not non_code(start)
}

private int block_start(Comment start) {
    (
        if_statement(start) or
        for_statement(start) or
        try_statement(start) or
        with_statement(start)
    ) and
    result = indentation(start) and
    not non_code(start)
}

private int scope_doc_string_part(Comment start, Comment end) {
    result = scope_start(start) and
    triple_quote(end) and
    end = non_empty_following(start)
    or
    exists(Comment mid |
        result = scope_doc_string_part(start, mid) and
        end = non_empty_following(mid)
    |
        not triple_quote(end)
    )
}

private int scope_part(Comment start, Comment end) {
    result = scope_start(start) and end = start
    or
    exists(Comment mid |
        result = scope_doc_string_part(start, mid) and
        end = non_empty_following(mid) and
        triple_quote(end)
    )
    or
    exists(Comment mid |
        result = scope_part(start, mid) and
        end = non_empty_following(mid)
    |
        indentation(end) > result
    )
}

private int block_part(Comment start, Comment end) {
    result = block_start(start) and
    end = non_empty_following(start) and
    indentation(end) > result
    or
    exists(Comment mid |
        result = block_part(start, mid) and
        end = non_empty_following(mid)
    |
        indentation(end) > result
        or
        result = block_start(end)
    )
}

private predicate commented_out_scope_part(Comment start, Comment end) {
    exists(scope_doc_string_part(start, end))
    or
    exists(scope_part(start, end))
}

private predicate commented_out_code(Comment c) {
    commented_out_scope_part(c, _)
    or
    commented_out_scope_part(_, c)
    or
    exists(block_part(c, _))
    or
    exists(block_part(_, c))
}

private predicate commented_out_code_part(Comment start, Comment end) {
    commented_out_code(start) and
    end = start and
    not exists(Comment prev | non_empty_following(prev) = start | commented_out_code(prev))
    or
    exists(Comment mid |
        commented_out_code_part(start, mid) and
        non_empty_following(mid) = end and
        commented_out_code(end)
    )
}

private predicate commented_out_code_block(Comment start, Comment end) {
    /* A block must be at least 2 comments long. */
    start != end and
    commented_out_code_part(start, end) and
    not commented_out_code(non_empty_following(end))
}

/* A single line comment that appears to be commented out code */
class CommentedOutCodeLine extends Comment {
    CommentedOutCodeLine() { exists(CommentedOutCodeBlock b | b.contains(this)) }

    /* Whether this commented-out code line is likely to be example code embedded in a larger comment. */
    predicate maybeExampleCode() {
        exists(CommentedOutCodeBlock block |
            block.contains(this) and
            block.maybeExampleCode()
        )
    }
}

/** A block of comments that appears to be commented out code */
class CommentedOutCodeBlock extends @py_comment {
    CommentedOutCodeBlock() { commented_out_code_block(this, _) }

    string toString() { result = "Commented out code" }

    /** Whether this commented-out code block contains the comment c */
    predicate contains(Comment c) {
        this = c
        or
        exists(Comment prev |
            non_empty_following(prev) = c and
            not commented_out_code_block(this, prev) and
            this.contains(prev)
        )
    }

    /** The length of this comment block (in comments) */
    int length() { result = count(Comment c | this.contains(c)) }

    predicate hasLocationInfo(string filepath, int bl, int bc, int el, int ec) {
        this.(Comment).getLocation().hasLocationInfo(filepath, bl, bc, _, _) and
        exists(Comment end | commented_out_code_block(this, end) |
            end.getLocation().hasLocationInfo(_, _, _, el, ec)
        )
    }

    /** Whether this commented-out code block is likely to be example code embedded in a larger comment. */
    predicate maybeExampleCode() {
        exists(CommentBlock block | block.contains(this.(Comment)) |
            exists(int all_code |
                all_code = sum(CommentedOutCodeBlock code | block.contains(code.(Comment)) | code.length()) and
                /* This ratio may need fine tuning */
                block.length() > all_code * 2
            )
        )
    }
}

/** Does c contain the pair of words "s1 s2" with only whitespace between them */
private predicate word_pair(Comment c, string s1, string s2) {
    exists(int i1, int i2, int o1, int o2 |
        s1 = c.getText().regexpFind("\\w+", i1, o1) and
        s2 = c.getText().regexpFind("\\w+", i2, o2) and
        i2 = i1 + 1 and
        c.getText().prefix(o1).regexpMatch("[^'\"]*") and
        c.getText().substring(o1 + s1.length(), o2).regexpMatch("\\s+")
    )
}

/**
 * The comment c cannot be code if it contains a word pair "word1 word2" and
 * either:
 * 1. word1 is not a keyword and word2 is not an operator:
 *    "x is" could be code, "return y" could be code, but "isnt code" cannot be code.
 * or
 * 2. word1 is a keyword requiring a colon and there is no colon:
 *    "with spam" can only be code if the comment contains a colon.
 */
private predicate non_code(Comment c) {
    exists(string word1, string word2 |
        word_pair(c, word1, word2) and
        not word2 = operator_keyword()
    |
        not word1 = a_keyword()
        or
        word1 = keyword_requiring_colon() and not c.getText().matches("%:%")
    ) and
    /* Except comments of the form: # (maybe code) # some comment */
    not c.getText().regexpMatch("#\\S+\\s.*#.*")
    or
    /* Don't count doctests as code */
    c.getText().matches("%>>>%")
    or
    c.getText().matches("%...%")
}

private predicate filler(Comment c) { c.getText().regexpMatch("#+[\\s*#-_=+]*") }

/** Gets the first non empty comment following c */
private Comment non_empty_following(Comment c) {
    not empty(result) and
    (
        result = empty_following(c).getFollowing()
        or
        not empty(c) and result = c.getFollowing()
    )
}

/* Helper for non_empty_following() */
private Comment empty_following(Comment c) {
    not empty(c) and
    empty(result) and
    exists(Comment prev | result = prev.getFollowing() |
        prev = c
        or
        prev = empty_following(c)
    )
}

private predicate empty(Comment c) { c.getText().regexpMatch("#+\\s*") }

/* A comment following code on the same line */
private predicate endline_comment(Comment c) {
    exists(Expr e, string f, int line |
        e.getLocation().hasLocationInfo(f, line, _, _, _) and
        c.getLocation().hasLocationInfo(f, line, _, _, _)
    )
}

private predicate file_or_url(Comment c) {
    c.getText().regexpMatch("#[^'\"]+(https?|file)://.*") or
    c.getText().regexpMatch("#[^'\"]+(/[a-zA-Z]\\w*)+\\.[a-zA-Z]+.*") or
    c.getText().regexpMatch("#[^'\"]+(\\[a-zA-Z]\\w*)+\\.[a-zA-Z]+.*")
}

private string operator_keyword() {
    result = "import" or
    result = "and" or
    result = "is" or
    result = "or" or
    result = "in" or
    result = "not" or
    result = "as"
}

private string keyword_requiring_colon() {
    result = "try" or
    result = "while" or
    result = "elif" or
    result = "else" or
    result = "if" or
    result = "except" or
    result = "def" or
    result = "class"
}

private string other_keyword() {
    result = "del" or
    result = "lambda" or
    result = "from" or
    result = "global" or
    result = "with" or
    result = "assert" or
    result = "yield" or
    result = "finally" or
    result = "print" or
    result = "exec" or
    result = "raise" or
    result = "return" or
    result = "for"
}

private string a_keyword() {
    result = keyword_requiring_colon() or result = other_keyword() or result = operator_keyword()
}
